## ---------------------------
##
## Script name: process_comp_disadv_Nation.R
##
## Description: Aggregates previously-geocoded, incident-level 
## police shootings with census tract information. Cleans and
## formats columns of male incarceration data.
##
## ---------------------------
## DEPENDENCIES

library(dplyr)
library(tidyverse)
library(ggplot2)
library(stringr)
library(sf)

# list of 100 cities to be included
city_list <- read.csv(paste(file_locations$current_fp, file_locations$Helpers$`City List`, sep="/"))

## ---------------------------

# WRAPPER FUNCTION
process_comp_disadv_Nation <- function(file_locations, overwrite) {
  #'
  #'@description Wrapper function that processes police
  #'shootings and male incarceration data.
  #'
  #'@param file_locations list. File locations loaded from
  #'file_locations.R
  #'@param overwrite boolean. Determines if already processed 
  #'file should be overwritten.
  #'
  #'@return NULL
  
  .process_police_shootings_nation(file_locations, overwrite=T)
  
  .process_male_incarceration_nation(file_locations, overwrite=overwrite)
  
}


.process_police_shootings_nation <- function(file_locations, overwrite=F) {
  #'
  #'@description Filters previously-geocoded incident-level 
  #'police shootings data for 100 cities and aggregates to 
  #'tract-year level.
  #'
  #'@param file_locations list. File locations loaded from
  #'file_locations.R
  #'@param overwrite boolean. Determines if already processed 
  #'file should be overwritten.
  #'
  #'@return saves output and returns NULL
  
  input_fp <- paste(file_locations$current_fp, file_locations$Nation$`Police Shootings`$raw, sep="/")
  output_fp <- paste(file_locations$current_fp, file_locations$Nation$`Police Shootings`$processed, sep="/")
  
  if (overwrite | !file.exists(output_fp)) {
    ps_all_cities_df <- read.csv(input_fp) %>%
        mutate(tract_2010 = str_pad(tract_2010, 11, side="left", pad="0"))
    
    all_city_tracts <- read_sf(paste(file_locations$current_fp, file_locations$Nation$Geographies$processed, sep="/"))
    
    agg_df <- ps_all_cities_df %>%
      filter(year >= 2014, year <= 2020) %>%
      group_by(tract_2010, year) %>%
      summarize(fatal_police_shootings = sum(killed))
    
    unique_tract <- all_city_tracts %>% st_drop_geometry(.) %>% dplyr::select(city, tract_2010)
    year <- seq(2014, 2020)
    year <- as.data.frame(year)
    full_tract_year <- unique_tract %>% mutate(dummy=1) %>%
      inner_join(year %>% mutate(dummy=1),
                 by="dummy") %>%
      dplyr::select(-dummy)
    
    agg_df <- full_tract_year %>%
      left_join(agg_df, by=c("tract_2010", "year")) %>%
      mutate(fatal_police_shootings = ifelse(is.na(fatal_police_shootings), 0, fatal_police_shootings))
    
    write.csv(agg_df, output_fp, row.names = F)
  }
}



.process_male_incarceration_nation <- function(file_locations, overwrite=F) {
  #'
  #'@description Renames column names in male incarceration
  #'file.
  #'
  #'@param file_locations list. File locations loaded from
  #'file_locations.R
  #'@param overwrite boolean. Determines if already processed 
  #'file should be overwritten.
  #'
  #'@return saves output and returns NULL
  
  df_inc <- read.csv(paste(file_locations$current_fp, file_locations$Nation$`Male Incarceration`$raw, sep="/")) %>%
    rename(incarc_rate = Incarceration_Rate_rP_gM_pall) %>%
    mutate(tract_2010 = str_pad(tract, 11, side="left", pad="0"),
           incarc_rate = incarc_rate*10000,
           year = 2010) %>%
    select(-Name, -tract)
  
  write.csv(df_inc, paste(file_locations$current_fp, file_locations$Nation$`Male Incarceration`$processed, sep="/"),
            row.names = F)

}

